---
title: "simulation-placebo-version-MMAD3"
author: "Rebecca Strauch"
date: "02/02/2022"
output: html_document
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```
This file replicates the simulation of the placebo variable.
Due to time restraints, we create an exclusive a script for replicable purposes. 

```{r}
rm(list = ls())
library(readr)
library(ggplot2)
library(tidyverse)
library(dplyr)
library(stargazer)
library(interplot)
library(lubridate)
df <- read.csv(file.path("df-rev.csv.gz"), stringsAsFactors = F)


#assign variable type
df$date <- as.Date(df$date)
df$country <- as.character(df$country)

#create lagged variables
df <- 
  df%>%
  group_by(country) %>%
  mutate(lag_users_total = dplyr::lag(users_total, n = 1, default = NA),
         lag_protest = dplyr::lag(protest, n = 1, default = NA),
         lag_protest_number = dplyr::lag(protest_number, n = 1, default = NA))


#create few and severe protest variable 
df$low_number <- 0
df$low_number[df$lag_protest_number == 1] <- 1

df$high_number <- 0
df$high_number[df$lag_protest_number > 1] <- 1

#Generate Weekdays 
df$wday <- wday(as.Date(df$date,'%d-%m-%Y'), label=TRUE)
df$wday <- as.factor(df$wday)
```
 
The process below describes how we redistribute the number of few and severe protests by 1000 simulations. 

```{r, results='asis', warning=FALSE}
###Create alternative protest variable `placebo': The variable shares the same amount of 0 and 1 of few and severe protests but the values are distributed randomly within units of countries and years.

sample <- df %>%
    group_by(country, year) %>%
    dplyr::select(users_total, high_number, low_number, wday)

###We use three different sets of seeds. 
seeds <- c(1:1000)

###Run Simulation of country, year and weekday FEs
result_low <- vector("numeric", 1000) ###create container to store results 
result_high <- vector("numeric", 1000)
for (seed in seeds) {
  set.seed(seed)
  sample <- sample %>%
    group_by(country, year) %>%
    mutate(placebo_low_number = sample(low_number), 
           placebo_high_number = sample(high_number),
           lag_users_total = dplyr::lag(users_total, n = 1, default = NA),
           lag_placebo_low_number = dplyr::lag(placebo_low_number , n = 1, default = NA),
           lag_placebo_high_number = dplyr::lag(placebo_high_number , n = 1, default = NA)) 
  model <- lm(log10(1+users_total) ~ log10(1+lag_users_total) + lag_placebo_low_number +lag_placebo_high_number + as.factor(country) + as.factor(year) + as.factor(wday), data=sample)
 result_low[seed] <- coef(summary(model))["lag_placebo_low_number","Estimate"]
 result_high[seed] <- coef(summary(model))["lag_placebo_high_number","Estimate"]
}


###Run Simulation of country * year and weekday FEs
result_low2 <- vector("numeric", 1000) ###create container to store results 
result_high2 <- vector("numeric", 1000)
for (seed in seeds) {
  set.seed(seed)
  sample <- sample %>%
    group_by(country, year) %>%
    mutate(placebo_low_number = sample(low_number), 
           placebo_high_number = sample(high_number),
           lag_users_total = dplyr::lag(users_total, n = 1, default = NA),
           lag_placebo_low_number = dplyr::lag(placebo_low_number , n = 1, default = NA),
           lag_placebo_high_number = dplyr::lag(placebo_high_number , n = 1, default = NA)) 
  model2 <- lm(log10(1+users_total) ~ log10(1+lag_users_total) + lag_placebo_low_number +lag_placebo_high_number + as.factor(country)*as.factor(year) + as.factor(wday), data=sample)
 result_low2[seed] <- coef(summary(model2))["lag_placebo_low_number","Estimate"]
 result_high2[seed] <- coef(summary(model2))["lag_placebo_high_number","Estimate"]
}

quantile(result_high, c(.90, .95, .99)) # estimate at 0.013
quantile(result_high2, c(.90, .95, .99)) # estimate at 0.011
```

```{r}
###save vectors
save(result_low, file = "result_low-V3.rda")
save(result_high, file = "result_high-V3.rda")
save(result_low2, file = "result_low2-V3.rda")
save(result_high2, file = "result_high2-V3.rda")
```